require(magrittr)
require(knitr)
require(dplyr)
require(tidyr)
require(tibble)
require(ggplot2)
require(Seurat)
require(cowplot)
require(kableExtra)
require(readr)
require(readxl)
require(pochi)
parent.directory <- "intermediate/"
parent.directory.mtx <- "filtered/"
We take our GC and antibody-secreting cell (ASC) identifiers from our orignal Seurat dataset. We then get the information from the original 10X files, only taking the cell identifiers from the GC and ASCs.
load(file = paste0(parent.directory, "sct_cluster_metadata.RData"))
GCPC_metadata %>%
filter(parent_cluster != "B PLCG2") -> GCPC_metadata
list_identifiers <- c("TC124.1", "TC124.2", "TC124.3", "TC125", "TC126")
cell_barcodes_list <- lapply(1:length(list_identifiers), function(i){
GCPC_metadata %>%
filter(orig_ident == list_identifiers[i]) %>%
select(identifier) %>%
mutate(raw_barcode = sub(paste0(list_identifiers[i], "_"), "", identifier)) %>%
pull(raw_barcode) -> raw_barcodes
return(raw_barcodes)
})
TC124.1.data <- Read10X(data.dir = paste0(parent.directory.mtx, "Amit1"), strip.suffix = TRUE)
TC124.2.data <- Read10X(data.dir = paste0(parent.directory.mtx, "Amit7"), strip.suffix = TRUE)
TC124.3.data <- Read10X(data.dir = paste0(parent.directory.mtx, "Amit8"), strip.suffix = TRUE)
TC125.data <- Read10X(data.dir = paste0(parent.directory.mtx, "Amit2"), strip.suffix = TRUE)
TC126.data <- Read10X(data.dir = paste0(parent.directory.mtx, "Amit3"), strip.suffix = TRUE)
B.names <- c("TC124.1", "TC124.2", "TC124.3", "TC125", "TC126")
B.list <- list(TC124.1.data, TC124.2.data, TC124.3.data, TC125.data, TC126.data)
rm(list = ls()[grepl(".data", ls(), fixed = TRUE)])
B.list <- lapply(1:length(B.list), function(i){
B.list[[i]] <- B.list[[i]][,cell_barcodes_list[[i]]]
colnames(B.list[[i]]) <- paste0(B.names[i], "_", colnames(B.list[[i]]))
return(CreateSeuratObject(B.list[[i]], project = B.names[[i]], min.cells = 3, min.features = 200))
})
Now that we have our new Seurat objects, we will add metadata and then will re-perform our SCTransform normalization and the Seurat v3 integration algorithm.
read_excel("genesets/dissociation_genes.xlsx") %>% select('Human ID') %>% na.omit %>% pull("Human ID") -> dissociation.genes
for(i in 1:length(B.list)){
temp.dissoc.genes <- dissociation.genes[dissociation.genes %in% rownames(B.list[[i]])]
B.list[[i]][["percent.dissoc"]] <- PercentageFeatureSet(B.list[[i]], features = temp.dissoc.genes)
B.list[[i]][["percent.mt"]] <- PercentageFeatureSet(B.list[[i]], pattern = "^MT-")
B.list[[i]][["donor"]] <- substring(B.list[[i]]$orig.ident, 1, 5)
}
#This removes the unused data thus far
rm(i, temp.dissoc.genes)
B.list <- list(merge(x = B.list[[1]], y = B.list[2:3]), B.list[[4]], B.list[[5]])
B.list <- lapply(1:length(B.list), function(i){
return(SCTransform(B.list[[i]], verbose = TRUE))
})
B.features <- SelectIntegrationFeatures(object.list = B.list, nfeatures = 3000)
B.list <- PrepSCTIntegration(object.list = B.list, anchor.features = B.features, verbose = TRUE)
B.anchors <- FindIntegrationAnchors(object.list = B.list, normalization.method = "SCT", anchor.features = B.features, dims = 1:40, verbose = TRUE)
B.gcpc <- IntegrateData(anchorset = B.anchors, normalization.method = "SCT", dims = 1:40, verbose = TRUE)
save(B.gcpc, GCPC_metadata, file = paste0(parent.directory, "sct_gcpc_integrated.RData"))
We perform PCA and UMAP and Louvain clustering just as before. We will also perform Louvain clustering at a single resolution, followed by scoring of specific gene sets just as in the original Seurat dataset.
load(file = paste0(parent.directory, "sct_gcpc_integrated.RData"))
B.gcpc[["SCT"]] <- NULL
IG.genes <- grep("^IGL|^IGK|^IGHV", B.gcpc@assays$integrated@var.features, value = TRUE)
B.gcpc@assays$integrated@var.features <- setdiff(B.gcpc@assays$integrated@var.features, c(IG.genes))
B.gcpc <- RunPCA(B.gcpc, npcs = 50, verbose = TRUE)
dims.to.use <- 1:30
B.gcpc <- BuildAnnoyUMAP(B.gcpc,
reduction = "pca",
metric = "euclidean",
dims = dims.to.use,
reduction.name = "pca_UMAP",
reduction_key = "pcaumap_",
save.graphs = TRUE)
DefaultAssay(B.gcpc) <- "RNA"
B.gcpc <- NormalizeData(B.gcpc)
B.gcpc <- FindClusters(B.gcpc, resolution = 0.5, graph.name = "annoy_nn_snn")
B.gcpc$previous_idents <- plyr::mapvalues(rownames(B.gcpc@meta.data), from = GCPC_metadata$identifier, to = as.character(GCPC_metadata$parent_cluster))
save(B.gcpc, file = paste0(parent.directory,"sct_seurat_gcpc_clustered.RData"))
We use the Seurat FindAllMarkers function and
SoupX to find two sets of marker genes for our new
clusters.
GCPC_RNA_markers <- FindAllMarkers(B.gcpc, assay = "RNA", logfc.threshold = 0.3, test.use = "LR", verbose = TRUE, only.pos = FALSE, latent.vars = "donor")
GCPC_SoupX_markers <- SoupX::quickMarkers(B.gcpc[["RNA"]]@data, B.gcpc$seurat_clusters, N = 20)
save(GCPC_RNA_markers, GCPC_SoupX_markers, file = paste0(parent.directory, "sct_seurat_gcpc_markers.RData"))
We can now visualize the new clusters and the previous clusters.
load(paste0(parent.directory,"sct_seurat_gcpc_clustered.RData"))
load(paste0(parent.directory,"sct_seurat_gcpc_markers.RData"))
DimPlot(B.gcpc, reduction = "pca_UMAP", label = TRUE, label.size = 5, label.box = TRUE, pt.size = 0.8, group.by = "previous_idents")
DimPlot(B.gcpc, reduction = "pca_UMAP", label = TRUE, label.size = 5, label.box = TRUE, pt.size = 0.8)
We will visualize the SoupX markers here.
cluster_list <- sort(unique(B.gcpc$seurat_clusters))
n_clusters <- length(cluster_list)
mrks <- SoupX::quickMarkers(B.gcpc[["RNA"]]@data, B.gcpc$seurat_clusters, N = 20)
## as(<dgCMatrix>, "dgTMatrix") is deprecated since Matrix 1.5-0; do as(., "TsparseMatrix") instead
mrks_list <- lapply(cluster_list, function(i){
mrks %>%
dplyr::filter(cluster == i) %>%
dplyr::pull(gene)
})
dotplot_list <- lapply(1:n_clusters, function(i) {
cluster_name <- cluster_list[i]
temp.slot <- paste0("Cluster ", cluster_name)
if(length(mrks_list[[{{i}}]]) == 0){return(NULL)}
src <- c("#### {{temp.slot}} {.unnumbered}",
"```{r soupdotplot-{{i}}, fig.width = 12, fig.height = 6}",
"DotPlot(B.gcpc, features = mrks_list[[{{i}}]], assay = \"RNA\")+RotatedAxis()",
"```",
"")
knit_expand(text = src)
})
out <- knit_child(text = unlist(dotplot_list), options = list(echo = FALSE, cache = FALSE))
DefaultAssay(B.gcpc) <- "RNA"
cluster_list <- levels(GCPC_RNA_markers$cluster)
n_clusters <- length(cluster_list)
gene_list <- lapply(cluster_list, function(i){
GCPC_RNA_markers %>%
dplyr::filter(p_val_adj < 0.01) %>%
dplyr::filter(cluster == i) %>%
dplyr::top_n(20, wt = avg_log2FC) %>%
dplyr::pull(gene)
})
dotplot_list <- lapply(1:n_clusters, function(i) {
cluster_name <- cluster_list[i]
temp.slot <- paste0("Cluster ", cluster_name)
if(length(gene_list[[i]]) == 0 ){
return(NULL)
}
src <- c("#### {{temp.slot}} {.unnumbered}",
"```{r dotplot-{{i}}, fig.width = 12, fig.height = 6}",
"DotPlot(B.gcpc, features = gene_list[[{{i}}]])+RotatedAxis()+scale_color_viridis_c(direction = -1, option = \"A\")",
"```",
"")
knit_expand(text = src)
})
out <- knit_child(text = unlist(dotplot_list), options = list(echo = FALSE, cache = FALSE))
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
We include tables of the marker genes found in each cluster.
cluster_vector <- levels(B.gcpc@active.ident)
n_clusters <- length(cluster_vector)
markers.list <- lapply(1:n_clusters, function(i) {
GCPC_RNA_markers %>%
dplyr::filter(cluster == cluster_vector[i] & p_val_adj < 0.01) %>%
dplyr::arrange(-avg_log2FC) %>%
dplyr::select(cluster, gene, avg_log2FC, p_val, p_val_adj) %>%
dplyr::mutate_if(is.numeric, format, digits = 3)
})
names(markers.list) <- paste("Cluster", cluster_vector)
markers.list <- c(markers.list, "ALL" = list(GCPC_RNA_markers %>% dplyr::arrange(-avg_log2FC) %>% select(cluster, gene, avg_log2FC, p_val, p_val_adj) %>% mutate_if(is.numeric, format, digits = 3)))
src_list <- lapply(1:(n_clusters+1), function(i) {
if(i == (n_clusters+1)){
label <- "ALL"
} else {
label <- cluster_vector[i]
}
src <- c("#### {{label}} {.unnumbered} ",
"<div style = \"width:65%; height:auto; align:left\">",
"```{r marker-cluster-{{label}}, fig.width=3}",
"DT::datatable(markers.list[[{{i}}]], rownames = FALSE)",
"```",
"</div>",
"")
knit_expand(text = src)
})
out <- knit_child(text = unlist(src_list), options = list(echo = FALSE, cache = FALSE))
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
The cluster identities remain largely the same as before.
#load(file = paste0(parent.directory, "sct_seurat_gcpc_clustered.RData"))
cluster_levels <- paste0("C",c(1,8,5,11,7,6,4,9,2,3,10,0))
cluster_names <- c("GC 1",
"GC 2",
"GC 3",
"GC IgA",
"LZ",
"DZ 1",
"DZ 2",
"DZ 3",
"DZ 4",
"GC LMO2",
"ASC IgM",
"ASC IgG")
B.gcpc$new_clusters_nums <- factor(paste0("C", as.character(B.gcpc$annoy_nn_snn_res.0.5)),
levels = cluster_levels)
B.gcpc$cluster_names <- plyr::mapvalues(B.gcpc$new_clusters_nums,
from = cluster_levels,
to = cluster_names)
Idents(B.gcpc) <- "cluster_names"
DimPlot(B.gcpc, label = TRUE, label.box = TRUE, repel = TRUE, cols = c("dodgerblue", "forestgreen", RColorBrewer::brewer.pal(11, "Set3")))+NoLegend()
save(B.gcpc, file = paste0(parent.directory, "sct_seurat_gcpc_named_clusters.RData"))
load(file = paste0(parent.directory, "sct_seurat_gcpc_named_clusters.RData"))
DimPlot(B.gcpc, label = TRUE, label.box = TRUE, repel = TRUE, cols = c("dodgerblue", "forestgreen", RColorBrewer::brewer.pal(11, "Set3")))+NoLegend()
sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] pochi_0.1.0 readxl_1.4.1 readr_2.1.3 kableExtra_1.3.4
## [5] cowplot_1.1.1 sp_1.5-0 SeuratObject_4.1.2 Seurat_4.2.0
## [9] ggplot2_3.3.6 tibble_3.1.8 tidyr_1.2.1 dplyr_1.0.10
## [13] knitr_1.40 magrittr_2.0.3
##
## loaded via a namespace (and not attached):
## [1] SoupX_1.6.1 Rtsne_0.16 colorspace_2.0-3
## [4] deldir_1.0-6 ellipsis_0.3.2 ggridges_0.5.4
## [7] rstudioapi_0.14 spatstat.data_2.2-0 farver_2.1.1
## [10] leiden_0.4.3 listenv_0.8.0 DT_0.25
## [13] ggrepel_0.9.1 fansi_1.0.3 xml2_1.3.3
## [16] codetools_0.2-18 splines_4.1.1 cachem_1.0.6
## [19] polyclip_1.10-0 jsonlite_1.8.2 ica_1.0-3
## [22] cluster_2.1.4 png_0.1-7 rgeos_0.5-9
## [25] uwot_0.1.14 shiny_1.7.2 sctransform_0.3.5
## [28] spatstat.sparse_2.1-1 compiler_4.1.1 httr_1.4.4
## [31] assertthat_0.2.1 Matrix_1.5-1 fastmap_1.1.0
## [34] lazyeval_0.2.2 cli_3.4.1 later_1.3.0
## [37] htmltools_0.5.3 tools_4.1.1 igraph_1.3.5
## [40] gtable_0.3.1 glue_1.6.2 RANN_2.6.1
## [43] reshape2_1.4.4 Rcpp_1.0.9 scattermore_0.8
## [46] cellranger_1.1.0 jquerylib_0.1.4 vctrs_0.4.2
## [49] svglite_2.1.0 nlme_3.1-159 progressr_0.11.0
## [52] crosstalk_1.2.0 lmtest_0.9-40 spatstat.random_2.2-0
## [55] xfun_0.33 stringr_1.4.1 globals_0.16.1
## [58] rvest_1.0.3 mime_0.12 miniUI_0.1.1.1
## [61] lifecycle_1.0.3 irlba_2.3.5.1 goftest_1.2-3
## [64] future_1.28.0 MASS_7.3-58.1 zoo_1.8-11
## [67] scales_1.2.1 spatstat.core_2.4-4 hms_1.1.2
## [70] promises_1.2.0.1 spatstat.utils_2.3-1 parallel_4.1.1
## [73] RColorBrewer_1.1-3 yaml_2.3.5 reticulate_1.26
## [76] pbapply_1.5-0 gridExtra_2.3 sass_0.4.2
## [79] rpart_4.1.16 stringi_1.7.8 highr_0.9
## [82] systemfonts_1.0.4 rlang_1.0.6 pkgconfig_2.0.3
## [85] matrixStats_0.62.0 evaluate_0.17 lattice_0.20-45
## [88] ROCR_1.0-11 purrr_0.3.5 tensor_1.5
## [91] labeling_0.4.2 patchwork_1.1.2 htmlwidgets_1.5.4
## [94] tidyselect_1.2.0 parallelly_1.32.1 RcppAnnoy_0.0.19
## [97] plyr_1.8.7 R6_2.5.1 generics_0.1.3
## [100] DBI_1.1.3 mgcv_1.8-40 pillar_1.8.1
## [103] withr_2.5.0 fitdistrplus_1.1-8 survival_3.4-0
## [106] abind_1.4-5 future.apply_1.9.1 KernSmooth_2.23-20
## [109] utf8_1.2.2 spatstat.geom_2.4-0 plotly_4.10.0
## [112] tzdb_0.3.0 rmarkdown_2.17 grid_4.1.1
## [115] data.table_1.14.2 webshot_0.5.4 digest_0.6.29
## [118] xtable_1.8-4 httpuv_1.6.6 munsell_0.5.0
## [121] viridisLite_0.4.1 bslib_0.4.0